{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.5/dist-packages/sklearn/cross_validation.py:41: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.\n",
      "  \"This module will be removed in 0.20.\", DeprecationWarning)\n"
     ]
    }
   ],
   "source": [
    "import tensorflow as tf\n",
    "import numpy as np\n",
    "import copy\n",
    "import time\n",
    "from sklearn.cross_validation import train_test_split\n",
    "import os\n",
    "import pickle"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "def layer_norm_all(h, base, num_units, scope):\n",
    "    with tf.variable_scope(scope):\n",
    "        h_reshape = tf.reshape(h, [-1, base, num_units])\n",
    "        mean = tf.reduce_mean(h_reshape, [2], keep_dims=True)\n",
    "        var = tf.reduce_mean(tf.square(h_reshape - mean), [2], keep_dims=True)\n",
    "        epsilon = tf.constant(1e-3)\n",
    "        rstd = tf.rsqrt(var + epsilon)\n",
    "        h_reshape = (h_reshape - mean) * rstd\n",
    "        h = tf.reshape(h_reshape, [-1, base * num_units])\n",
    "        alpha = tf.get_variable('layer_norm_alpha', [4 * num_units],\n",
    "                                initializer=tf.constant_initializer(1.0), dtype=tf.float32)\n",
    "        bias = tf.get_variable('layer_norm_bias', [4 * num_units],\n",
    "                               initializer=tf.constant_initializer(0.0), dtype=tf.float32)\n",
    "        return (h * alpha) + bias\n",
    "    \n",
    "def layer_norm(x, scope=\"layer_norm\", alpha_start=1.0, bias_start=0.0):\n",
    "    with tf.variable_scope(scope):\n",
    "        num_units = x.get_shape().as_list()[1]\n",
    "        alpha = tf.get_variable('alpha', [num_units],\n",
    "                                initializer=tf.constant_initializer(alpha_start), dtype=tf.float32)\n",
    "        bias = tf.get_variable('bias', [num_units],\n",
    "                               initializer=tf.constant_initializer(bias_start), dtype=tf.float32)\n",
    "        mean, variance = moments_for_layer_norm(x)\n",
    "        y = (alpha * (x - mean)) / (variance) + bias\n",
    "    return y\n",
    "\n",
    "def moments_for_layer_norm(x, axes=1, name=None):\n",
    "    epsilon = 1e-3\n",
    "    if not isinstance(axes, list): axes = [axes]\n",
    "    mean = tf.reduce_mean(x, axes, keep_dims=True)\n",
    "    variance = tf.sqrt(tf.reduce_mean(tf.square(x - mean), axes, keep_dims=True) + epsilon)\n",
    "    return mean, variance\n",
    "\n",
    "def zoneout(new_h, new_c, h, c, h_keep, c_keep, is_training):\n",
    "    mask_c = tf.ones_like(c)\n",
    "    mask_h = tf.ones_like(h)\n",
    "    if is_training:\n",
    "        mask_c = tf.nn.dropout(mask_c, c_keep)\n",
    "        mask_h = tf.nn.dropout(mask_h, h_keep)\n",
    "    mask_c *= c_keep\n",
    "    mask_h *= h_keep\n",
    "    h = new_h * mask_h + (-mask_h + 1.) * h\n",
    "    c = new_c * mask_c + (-mask_c + 1.) * c\n",
    "    return h, c\n",
    "\n",
    "class LN_LSTMCell(tf.contrib.rnn.RNNCell):\n",
    "    def __init__(self, num_units, f_bias=1.0, use_zoneout=False,\n",
    "                 zoneout_keep_h = 0.9, zoneout_keep_c = 0.5,\n",
    "                 is_training = True,reuse=None, name=None):\n",
    "        super(LN_LSTMCell, self).__init__(_reuse=reuse, name=name)\n",
    "        self.num_units = num_units\n",
    "        self.f_bias = f_bias\n",
    "        self.use_zoneout  = use_zoneout\n",
    "        self.zoneout_keep_h = zoneout_keep_h\n",
    "        self.zoneout_keep_c = zoneout_keep_c\n",
    "        self.is_training = is_training\n",
    "        \n",
    "    def build(self, inputs_shape):\n",
    "        w_init = tf.orthogonal_initializer(1.0)\n",
    "        h_init = tf.orthogonal_initializer(1.0)\n",
    "        b_init = tf.constant_initializer(0.0)\n",
    "        h_size = self.num_units\n",
    "        self.W_xh = tf.get_variable('W_xh',[inputs_shape[1], 4 * h_size], initializer=w_init, dtype=tf.float32)\n",
    "        self.W_hh = tf.get_variable('W_hh',[h_size, 4 * h_size], initializer=h_init, dtype=tf.float32)\n",
    "        self.bias = tf.get_variable('bias', [4 * h_size], initializer=b_init, dtype=tf.float32)\n",
    "        \n",
    "    def call(self, x, state):\n",
    "        h, c = state\n",
    "        h_size = self.num_units\n",
    "        concat = tf.concat(axis=1, values=[x, h])\n",
    "        W_full = tf.concat(axis=0, values=[self.W_xh, self.W_hh])\n",
    "        concat = tf.matmul(concat, W_full) + self.bias\n",
    "        concat = layer_norm_all(concat, 4, h_size, 'ln')\n",
    "        i, j, f, o = tf.split(axis=1, num_or_size_splits=4, value=concat)\n",
    "        new_c = c * tf.sigmoid(f + self.f_bias) + tf.sigmoid(i) * tf.tanh(j)\n",
    "        new_h = tf.tanh(layer_norm(new_c, 'ln_c')) * tf.sigmoid(o)\n",
    "        if self.use_zoneout:\n",
    "            new_h, new_c = zoneout(new_h, new_c, h, c, self.zoneout_keep_h,\n",
    "                                   self.zoneout_keep_c, self.is_training)\n",
    "        return new_h, new_c\n",
    "    \n",
    "    def zero_state(self, batch_size, dtype):\n",
    "        h = tf.zeros([batch_size, self.num_units], dtype=dtype)\n",
    "        c = tf.zeros([batch_size, self.num_units], dtype=dtype)\n",
    "        return (h, c)\n",
    "    \n",
    "    @property\n",
    "    def state_size(self):\n",
    "        return self.num_units\n",
    "\n",
    "    @property\n",
    "    def output_size(self):\n",
    "        return self.num_units"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "class Model:\n",
    "    def __init__(self, num_layers, size_layer, dimension_input, dimension_output, learning_rate):\n",
    "        def lstm_cell():\n",
    "            return tf.contrib.rnn.LayerNormBasicLSTMCell(size_layer)\n",
    "        self.rnn_cells = tf.nn.rnn_cell.MultiRNNCell([lstm_cell() for _ in range(num_layers)])\n",
    "        self.X = tf.placeholder(tf.float32, [None, None, dimension_input])\n",
    "        self.Y = tf.placeholder(tf.float32, [None, dimension_output])\n",
    "        drop = tf.contrib.rnn.DropoutWrapper(self.rnn_cells, output_keep_prob = 0.5)\n",
    "        self.outputs, self.last_state = tf.nn.dynamic_rnn(drop, self.X, dtype = tf.float32)\n",
    "        self.rnn_W = tf.Variable(tf.random_normal((size_layer, dimension_output)))\n",
    "        self.rnn_B = tf.Variable(tf.random_normal([dimension_output]))\n",
    "        self.logits = tf.matmul(self.outputs[:, -1], self.rnn_W) + self.rnn_B\n",
    "        self.cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = self.logits, labels = self.Y))\n",
    "        l2 = sum(0.0005 * tf.nn.l2_loss(tf_var) for tf_var in tf.trainable_variables())\n",
    "        self.cost += l2\n",
    "        self.optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(self.cost)\n",
    "        self.correct_pred = tf.equal(tf.argmax(self.logits, 1), tf.argmax(self.Y, 1))\n",
    "        self.accuracy = tf.reduce_mean(tf.cast(self.correct_pred, tf.float32))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "maxlen = 20\n",
    "location = os.getcwd()\n",
    "num_layers = 2\n",
    "size_layer = 256\n",
    "learning_rate = 1e-7\n",
    "batch = 100"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open('dataset-emotion.p', 'rb') as fopen:\n",
    "    df = pickle.load(fopen)\n",
    "with open('vector-emotion.p', 'rb') as fopen:\n",
    "    vectors = pickle.load(fopen)\n",
    "with open('dataset-dictionary.p', 'rb') as fopen:\n",
    "    dictionary = pickle.load(fopen)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "label = np.unique(df[:,1])\n",
    "train_X, test_X, train_Y, test_Y = train_test_split(df[:,0], df[:, 1].astype('int'), test_size = 0.2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "'unwarrentedly'\n",
      "epoch: 0 , pass acc: 0 , current acc: 0.138139255778665\n",
      "time taken: 472.36148166656494\n",
      "epoch: 1 , training loss: nan , training acc: 0.13735752851363153 , valid loss: nan , valid acc: 0.138139255778665\n",
      "'unwarrentedly'\n",
      "time taken: 469.41162180900574\n",
      "epoch: 2 , training loss: nan , training acc: 0.13735752851363153 , valid loss: nan , valid acc: 0.138139255778665\n",
      "'unwarrentedly'\n",
      "time taken: 468.6463506221771\n",
      "epoch: 3 , training loss: nan , training acc: 0.13735752851363153 , valid loss: nan , valid acc: 0.138139255778665\n"
     ]
    }
   ],
   "source": [
    "tf.reset_default_graph()\n",
    "sess = tf.InteractiveSession()\n",
    "model = Model(num_layers, size_layer, vectors.shape[1], label.shape[0], learning_rate)\n",
    "sess.run(tf.global_variables_initializer())\n",
    "dimension = vectors.shape[1]\n",
    "saver = tf.train.Saver(tf.global_variables())\n",
    "EARLY_STOPPING, CURRENT_CHECKPOINT, CURRENT_ACC, EPOCH = 10, 0, 0, 0\n",
    "while True:\n",
    "    lasttime = time.time()\n",
    "    if CURRENT_CHECKPOINT == EARLY_STOPPING:\n",
    "        print('break epoch:', EPOCH)\n",
    "        break\n",
    "    train_acc, train_loss, test_acc, test_loss = 0, 0, 0, 0\n",
    "    for i in range(0, (train_X.shape[0] // batch) * batch, batch):\n",
    "        batch_x = np.zeros((batch, maxlen, dimension))\n",
    "        batch_y = np.zeros((batch, len(label)))\n",
    "        for k in range(batch):\n",
    "            tokens = train_X[i + k].split()[:maxlen]\n",
    "            emb_data = np.zeros((maxlen, dimension), dtype = np.float32)\n",
    "            for no, text in enumerate(tokens[::-1]):\n",
    "                try:\n",
    "                    emb_data[-1 - no, :] += vectors[dictionary[text], :]\n",
    "                except Exception as e:\n",
    "                    print(e)\n",
    "                    continue\n",
    "            batch_y[k, int(train_Y[i + k])] = 1.0\n",
    "            batch_x[k, :, :] = emb_data[:, :]\n",
    "        loss, _ = sess.run([model.cost, model.optimizer], feed_dict = {model.X : batch_x, model.Y : batch_y})\n",
    "        train_loss += loss\n",
    "        train_acc += sess.run(model.accuracy, feed_dict = {model.X : batch_x, model.Y : batch_y})\n",
    "    \n",
    "    for i in range(0, (test_X.shape[0] // batch) * batch, batch):\n",
    "        batch_x = np.zeros((batch, maxlen, dimension))\n",
    "        batch_y = np.zeros((batch, len(label)))\n",
    "        for k in range(batch):\n",
    "            tokens = test_X[i + k].split()[:maxlen]\n",
    "            emb_data = np.zeros((maxlen, dimension), dtype = np.float32)\n",
    "            for no, text in enumerate(tokens[::-1]):\n",
    "                try:\n",
    "                    emb_data[-1 - no, :] += vectors[dictionary[text], :]\n",
    "                except:\n",
    "                    continue\n",
    "            batch_y[k, int(test_Y[i + k])] = 1.0\n",
    "            batch_x[k, :, :] = emb_data[:, :]\n",
    "        loss, acc = sess.run([model.cost, model.accuracy], feed_dict = {model.X : batch_x, model.Y : batch_y})\n",
    "        test_loss += loss\n",
    "        test_acc += acc\n",
    "        \n",
    "    train_loss /= (train_X.shape[0] // batch)\n",
    "    train_acc /= (train_X.shape[0] // batch)\n",
    "    test_loss /= (test_X.shape[0] // batch)\n",
    "    test_acc /= (test_X.shape[0] // batch)\n",
    "    if test_acc > CURRENT_ACC:\n",
    "        print('epoch:', EPOCH, ', pass acc:', CURRENT_ACC, ', current acc:', test_acc)\n",
    "        CURRENT_ACC = test_acc\n",
    "        CURRENT_CHECKPOINT = 0\n",
    "        saver.save(sess, os.getcwd() + \"/model-rnn-vector.ckpt\")\n",
    "    else:\n",
    "        CURRENT_CHECKPOINT += 1\n",
    "    EPOCH += 1\n",
    "    print('time taken:', time.time()-lasttime)\n",
    "    print('epoch:', EPOCH, ', training loss:', train_loss, ', training acc:', train_acc, ', valid loss:', test_loss, ', valid acc:', test_acc)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
